In [1]:
library(data.table)
library(foreach)
library(doParallel)
In [2]:
GBD <- read.table("../Data/DALY_YLL_deaths_per_region_and_27_diseases_2005.txt")
GBD <- GBD[order(GBD$metr,GBD$Region,GBD$Disease),]
In [3]:
#Burden of all diseases in all regions for each metric of burden
GBD <- rbind(GBD,data.frame(metr=levels(GBD$metr),
Region="All",
Disease="all",
burden=tapply(GBD$burden[GBD$Region=="All"],GBD$metr[GBD$Region=="All"],sum)))
rownames(GBD) <- NULL
GBD <- GBD[order(GBD$metr,GBD$Region,GBD$Disease),]
#Burden in Non-HI
DNHI <- GBD[GBD$Region=="All",]
DNHI$burden <- DNHI$burden - GBD$burden[GBD$Region=="High-income"]
DNHI$Region <- "Non-HI"
GBD <- rbind(GBD,DNHI)
GBD$Region <- as.factor(as.character(GBD$Region))
GBD <- GBD[order(GBD$metr,GBD$Region,GBD$Disease),]
#Local proportions of burden across diseases for each metric of burden
GBD$Prp <- 100*GBD$burden/rep(GBD$burden[GBD$Disease=="all"],as.numeric(table(paste(GBD$metr,GBD$Region))))
GBD <- data.table(GBD)
In [4]:
Mgbd <- read.table("../Data/27_gbd_groups.txt")
sms <- list.files("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/")
dis <- as.numeric(substr(sms,25,nchar(sms)-4))
dis <- dis[dis!=0]
In [5]:
#We upload all replicates
#We will measure alignment only over diseases for which we have replicates
L <- list()
for(i in 1:length(dis)){
k <- dis[i]
DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
as.character(k),".txt"),collapse=""))
regs <- unique(DF$Region)
L[[i]] <- data.table(
Disease=Mgbd$x[k],
Region=DF$Region[DF$Dis=="dis"],
Prop_RCTs=100*DF$RCTs[DF$Dis=="dis"]/DF$RCTs[DF$Dis=="all"],
Prop_Patients=100*DF$Patients[DF$Dis=="dis"]/DF$Patients[DF$Dis=="all"],
sim_nb = rep(1:((nrow(DF)/length(regs))/2),each=length(regs)))
}
DT <- rbindlist(L)
#replicate per disease
DT$k <- paste(DT$Disease,DT$sim_nb)
#nb replicates available per disease
NS <- DT[,max(sim_nb),by="Disease"]
In [6]:
NK <- 10000
set.seed(1234)
#For each disease, we sample 10,000 replicates, giving local proportions of research
sims <- do.call('rbind',tapply(NS$V1,NS$Disease,function(x){sample(1:x,NK,replace=TRUE)}))
In [7]:
cl<-makeCluster(4)
registerDoParallel(cl)
t0 <- proc.time()
A <- foreach(k = 1:NK, .packages="data.table") %dopar% {
x <- sims[,k]
dtt <- merge(GBD,DT[DT$k%in%paste(rownames(sims),x),])
dtt[,.(sum(abs(Prp[Prp>=Prop_RCTs]-Prop_RCTs[Prp>=Prop_RCTs])),
sum(abs(Prp[Prp>=2*Prop_RCTs]/2-Prop_RCTs[Prp>=2*Prop_RCTs])),
sum(abs(Prp[Prp>=Prop_Patients]-Prop_Patients[Prp>=Prop_Patients])),
sum(abs(Prp[Prp>=2*Prop_Patients]/2-Prop_RCTs[Prp>=2*Prop_Patients]))),
by=.(metr,Region)]
}
stopCluster(cl)
((proc.time() - t0)/60)
In [8]:
Al <- rbindlist(A)
In [9]:
head(Al)
In [10]:
AUI <- Al[,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975))}),by=.(metr,Region),.SDcols=c("V1","V2","V3","V4")]
AUI$UI <- rep(c("low","med","up"),times=length(unique(paste(AUI$metr,AUI$Region))))
AUI <- AUI[,c("UI","Region","metr","V1","V2","V3","V4")]
names(AUI) <- c("UI","Region","metr","RCTs_fill","RCTs_nogap","Patients_fill","Patients_nogap")
In [11]:
write.table(AUI,"../Data/Alignment_ratios_within_regions_across_diseases_wt_sims_patients_metrs_burdens.txt")
In [ ]: